HAMMER 40F/Many: Inode/link-count sequencer cleanup pass, UNDO cache.
authorMatthew Dillon <dillon@dragonflybsd.org>
Sat, 3 May 2008 20:21:20 +0000 (20:21 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sat, 3 May 2008 20:21:20 +0000 (20:21 +0000)
* Implement an UNDO cache.  If we have already laid down an UNDO in the
  current flush cycle we do not have to lay down another one for the same
  address.  This greatly reduces the number of UNDOs we generate during
  a flush.

* Properly get the vnode in order to be able to issue vfsync()'s from the
  backend.  We may also have to acquire the vnode when doing an unload
  check for a file deletion.

* Properly generate UNDO records for the volume header.  During crash recovery
  we have to UNDO the volume header along with any partially written
  meta-data, because the volume header refers to the meta-data.

* Add another record type, GENERAL, representing inode or softlink records.

* Move the setting of HAMMER_INODE_WRITE_ALT to the backend, allowing
  the kernel to flush buffers up to the point where the backend syncs
  the inode.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_freemap.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_undo.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 5271ab6..914e269 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.58 2008/05/03 07:59:06 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.59 2008/05/03 20:21:20 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -242,7 +242,7 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_INODE_DELETED   0x0080  /* inode delete (backend) */
 #define HAMMER_INODE_DELONDISK 0x0100  /* delete synchronized to disk */
 #define HAMMER_INODE_RO                0x0200  /* read-only (because of as-of) */
-#define HAMMER_INODE_UNUSED0400        0x0400
+#define HAMMER_INODE_VHELD     0x0400  /* vnode held on sync */
 #define HAMMER_INODE_DONDISK   0x0800  /* data records may be on disk */
 #define HAMMER_INODE_BUFS      0x1000  /* dirty high level bps present */
 #define HAMMER_INODE_REFLUSH   0x2000  /* pipelined flush during flush */
@@ -284,6 +284,7 @@ typedef struct hammer_inode *hammer_inode_t;
  * same time.
  */
 typedef enum hammer_record_type {
+       HAMMER_MEM_RECORD_GENERAL,      /* misc record */
        HAMMER_MEM_RECORD_ADD,          /* positive memory cache record */
        HAMMER_MEM_RECORD_DEL           /* negative delete-on-disk record */
 } hammer_record_type_t;
@@ -323,9 +324,11 @@ typedef struct hammer_record *hammer_record_t;
 struct hammer_volume;
 struct hammer_buffer;
 struct hammer_node;
+struct hammer_undo;
 RB_HEAD(hammer_vol_rb_tree, hammer_volume);
 RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
 RB_HEAD(hammer_nod_rb_tree, hammer_node);
+RB_HEAD(hammer_und_rb_tree, hammer_undo);
 
 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
              hammer_vol_rb_compare, int32_t);
@@ -333,6 +336,8 @@ RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
              hammer_buf_rb_compare, hammer_off_t);
 RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
              hammer_nod_rb_compare, hammer_off_t);
+RB_PROTOTYPE2(hammer_und_rb_tree, hammer_undo, rb_node,
+             hammer_und_rb_compare, hammer_off_t);
 
 /*
  * IO management - embedded at the head of various in-memory structures
@@ -501,6 +506,20 @@ typedef struct hammer_hole *hammer_hole_t;
 
 #include "hammer_cursor.h"
 
+/*
+ * Undo history tracking
+ */
+#define HAMMER_MAX_UNDOS       256
+
+struct hammer_undo {
+       RB_ENTRY(hammer_undo)   rb_node;
+       TAILQ_ENTRY(hammer_undo) lru_entry;
+       hammer_off_t            offset;
+       int                     bytes;
+};
+
+typedef struct hammer_undo *hammer_undo_t;
+
 /*
  * Internal hammer mount data structure
  */
@@ -510,6 +529,7 @@ struct hammer_mount {
        struct hammer_ino_rb_tree rb_inos_root;
        struct hammer_vol_rb_tree rb_vols_root;
        struct hammer_nod_rb_tree rb_nods_root;
+       struct hammer_und_rb_tree rb_undo_root;
        struct hammer_volume *rootvol;
        struct hammer_base_elm root_btree_beg;
        struct hammer_base_elm root_btree_end;
@@ -545,7 +565,10 @@ struct hammer_mount {
        struct hammer_lock sync_lock;
        struct lock blockmap_lock;
        struct hammer_blockmap  blockmap[HAMMER_MAX_ZONES];
-       struct hammer_holes holes[HAMMER_MAX_ZONES];
+       struct hammer_holes     holes[HAMMER_MAX_ZONES];
+       struct hammer_undo      undos[HAMMER_MAX_UNDOS];
+       int                     undo_alloc;
+       TAILQ_HEAD(, hammer_undo)  undo_lru_list;
        TAILQ_HEAD(, hammer_inode) flush_list;
        TAILQ_HEAD(, hammer_objid_cache) objid_cache_list;
 };
@@ -567,6 +590,7 @@ extern struct vop_ops hammer_fifo_vops;
 extern struct bio_ops hammer_bioops;
 
 extern int hammer_debug_general;
+extern int hammer_debug_inode;
 extern int hammer_debug_locks;
 extern int hammer_debug_btree;
 extern int hammer_debug_tid;
@@ -585,8 +609,7 @@ extern int64_t hammer_contention_count;
 
 int    hammer_vop_inactive(struct vop_inactive_args *);
 int    hammer_vop_reclaim(struct vop_reclaim_args *);
-int    hammer_get_vnode(struct hammer_inode *ip, int lktype,
-                       struct vnode **vpp);
+int    hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp);
 struct hammer_inode *hammer_get_inode(hammer_transaction_t trans,
                        struct hammer_node **cache,
                        u_int64_t obj_id, hammer_tid_t asof, int flags,
@@ -640,6 +663,9 @@ hammer_tid_t hammer_alloc_objid(hammer_transaction_t trans, hammer_inode_t dip);
 void hammer_clear_objid(hammer_inode_t dip);
 void hammer_destroy_objid_cache(hammer_mount_t hmp);
 
+int hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset,
+                             int bytes);
+void hammer_clear_undo_history(hammer_mount_t hmp);
 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
 int hammer_get_dtype(u_int8_t obj_type);
 u_int8_t hammer_get_obj_type(enum vtype vtype);
@@ -765,7 +791,7 @@ int  hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
 void hammer_rel_inode(hammer_inode_t ip, int flush);
 int hammer_sync_inode(hammer_inode_t ip);
 void hammer_test_inode(hammer_inode_t ip);
-void hammer_inode_unloadable_check(hammer_inode_t ip);
+void hammer_inode_unloadable_check(hammer_inode_t ip, int getvp);
 
 int  hammer_ip_add_directory(struct hammer_transaction *trans,
                        hammer_inode_t dip, struct namecache *ncp,
@@ -865,3 +891,8 @@ hammer_modify_record_done(hammer_buffer_t buffer)
        hammer_modify_buffer_done(buffer);
 }
 
+#define hammer_modify_volume_field(trans, vol, field)          \
+       hammer_modify_volume(trans, vol, &(vol)->ondisk->field, \
+                            sizeof((vol)->ondisk->field))
+
+
index 42314ac..8812691 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.40 2008/05/02 01:00:42 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.41 2008/05/03 20:21:20 dillon Exp $
  */
 
 /*
@@ -1427,9 +1427,8 @@ btree_split_internal(hammer_cursor_t cursor)
                volume = hammer_get_root_volume(hmp, &error);
                KKASSERT(error == 0);
 
-               hammer_modify_volume(cursor->trans, volume,
-                                    &volume->ondisk->vol0_btree_root,
-                                    sizeof(hammer_off_t));
+               hammer_modify_volume_field(cursor->trans, volume,
+                                          vol0_btree_root);
                volume->ondisk->vol0_btree_root = parent->node_offset;
                hammer_modify_volume_done(volume);
                node->ondisk->parent = parent->node_offset;
@@ -1646,9 +1645,8 @@ btree_split_leaf(hammer_cursor_t cursor)
                volume = hammer_get_root_volume(hmp, &error);
                KKASSERT(error == 0);
 
-               hammer_modify_volume(cursor->trans, volume,
-                                    &volume->ondisk->vol0_btree_root,
-                                    sizeof(hammer_off_t));
+               hammer_modify_volume_field(cursor->trans, volume,
+                                          vol0_btree_root);
                volume->ondisk->vol0_btree_root = parent->node_offset;
                hammer_modify_volume_done(volume);
                leaf->ondisk->parent = parent->node_offset;
index d75c7e3..11a7bd3 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.10 2008/05/03 05:28:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.11 2008/05/03 20:21:20 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -46,8 +46,8 @@ static void hammer_flusher_thread(void *arg);
 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
 static void hammer_flusher_flush(hammer_mount_t hmp);
 static int hammer_must_finalize_undo(hammer_mount_t hmp);
-static void hammer_flusher_finalize(hammer_mount_t hmp,
-                   hammer_volume_t root_volume, hammer_off_t start_offset);
+static void hammer_flusher_finalize(hammer_transaction_t trans,
+                   hammer_off_t start_offset);
 
 void
 hammer_flusher_sync(hammer_mount_t hmp)
@@ -163,13 +163,12 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
 static void
 hammer_flusher_flush(hammer_mount_t hmp)
 {
-       hammer_volume_t root_volume;
+       struct hammer_transaction trans;
        hammer_blockmap_t rootmap;
        hammer_inode_t ip;
        hammer_off_t start_offset;
-       int error;
 
-       root_volume = hammer_get_root_volume(hmp, &error);
+       hammer_start_transaction_fls(&trans, hmp);
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
        start_offset = rootmap->next_offset;
 
@@ -193,12 +192,12 @@ hammer_flusher_flush(hammer_mount_t hmp)
                 */
                if (hammer_must_finalize_undo(hmp)) {
                        Debugger("Too many undos!!");
-                       hammer_flusher_finalize(hmp, root_volume, start_offset);
+                       hammer_flusher_finalize(&trans, start_offset);
                        start_offset = rootmap->next_offset;
                }
        }
-       hammer_flusher_finalize(hmp, root_volume, start_offset);
-       hammer_rel_volume(root_volume, 0);
+       hammer_flusher_finalize(&trans, start_offset);
+       hammer_done_transaction(&trans);
 }
 
 /*
@@ -234,17 +233,37 @@ hammer_must_finalize_undo(hammer_mount_t hmp)
  */
 static
 void
-hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume,
-                       hammer_off_t start_offset)
+hammer_flusher_finalize(hammer_transaction_t trans, hammer_off_t start_offset)
 {
+       hammer_mount_t hmp = trans->hmp;
+       hammer_volume_t root_volume = trans->rootvol;
        hammer_blockmap_t rootmap;
        hammer_io_t io;
 
        hammer_lock_ex(&hmp->sync_lock);
 
+       /*
+        * Sync the blockmap to the root volume ondisk buffer and generate
+        * the appropriate undo record.  We have to generate the UNDO even
+        * though we flush the volume header along with the UNDO fifo update
+        * because the meta-data (including the volume header) is flushed
+        * after the fifo update, not before.
+        */
+       if (root_volume->io.modified) {
+               hammer_modify_volume(trans, root_volume,
+                                   &root_volume->ondisk->vol0_blockmap,
+                                   sizeof(root_volume->ondisk->vol0_blockmap));
+               bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap,
+                     sizeof(root_volume->ondisk->vol0_blockmap));
+               hammer_modify_volume_done(root_volume);
+       }
+
        /*
         * Flush undo bufs
         */
+
+       hammer_clear_undo_history(hmp);
+
        while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
                KKASSERT(io->modify_refs == 0);
                hammer_ref(&io->lock);
@@ -275,7 +294,11 @@ hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume,
        crit_exit();
 
        /*
-        * Update the volume header
+        * Move the undo FIFO's markers and flush the root volume header.
+        *
+        * If a crash occurs while the root volume header is being written
+        * we just have to hope that the undo range has been updated.  It
+        * should be done in one I/O but XXX this won't be perfect.
         */
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
        if (rootmap->first_offset != start_offset) {
@@ -288,16 +311,8 @@ hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume,
                root_volume->ondisk->vol0_next_tid = hmp->next_tid;
                hammer_modify_volume_done(root_volume);
        }
-
-       /*
-        * Sync our cached blockmap array with the one in the root
-        * volume header.
-        */
-       if (root_volume->io.modified) {
-               bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap,
-                     sizeof(hmp->blockmap));
+       if (root_volume->io.modified)
                hammer_io_flush(&root_volume->io);
-       }
 
        /*
         * Wait for I/O to complete
@@ -309,7 +324,8 @@ hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume,
        crit_exit();
 
        /*
-        * Flush meta-data
+        * Flush meta-data.  The meta-data will be undone if we crash
+        * so we can safely flush it asynchronously.
         */
        while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
                KKASSERT(io->modify_refs == 0);
index f15b352..780dd5c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.8 2008/05/03 05:28:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.9 2008/05/03 20:21:20 dillon Exp $
  */
 
 /*
@@ -102,9 +102,9 @@ new_volume:
                                                     layer1, sizeof(*layer1));
                                --layer1->blocks_free;
                                hammer_modify_buffer_done(buffer1);
-                               hammer_modify_volume(trans, trans->rootvol,
-                                    &ondisk->vol0_stat_freebigblocks,
-                                    sizeof(ondisk->vol0_stat_freebigblocks));
+                               hammer_modify_volume_field(trans,
+                                                    trans->rootvol,
+                                                    vol0_stat_freebigblocks);
                                --ondisk->vol0_stat_freebigblocks;
                                hammer_modify_volume_done(trans->rootvol);
                                break;
@@ -174,9 +174,8 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset,
        layer2->u.owner = HAMMER_BLOCKMAP_FREE;
        hammer_modify_buffer_done(buffer2);
 
-       hammer_modify_volume(trans, trans->rootvol,
-                            &ondisk->vol0_stat_freebigblocks,
-                            sizeof(ondisk->vol0_stat_freebigblocks));
+       hammer_modify_volume_field(trans, trans->rootvol,
+                                  vol0_stat_freebigblocks);
        ++ondisk->vol0_stat_freebigblocks;
        hammer_modify_volume_done(trans->rootvol);
 
index 502f6ec..f049032 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.48 2008/05/03 07:59:06 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.49 2008/05/03 20:21:20 dillon Exp $
  */
 
 #include "hammer.h"
@@ -68,7 +68,7 @@ hammer_vop_inactive(struct vop_inactive_args *ap)
         * fairly clean, try to recycle it immediately.  This can deadlock
         * in vfsync() if we aren't careful.
         */
-       hammer_inode_unloadable_check(ip);
+       hammer_inode_unloadable_check(ip, 0);
        if (ip->flags & HAMMER_INODE_MODMASK)
                hammer_flush_inode(ip, 0);
        else if (ip->ino_rec.ino_nlinks == 0)
@@ -107,7 +107,7 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap)
  * Called from the frontend.
  */
 int
-hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
+hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
 {
        struct vnode *vp;
        int error = 0;
@@ -412,6 +412,8 @@ retry:
                cursor->flags |= HAMMER_CURSOR_BACKEND;
 
                error = hammer_btree_lookup(cursor);
+               if (hammer_debug_inode)
+                       kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
                if (error) {
                        kprintf("error %d\n", error);
                        Debugger("hammer_update_inode");
@@ -419,6 +421,8 @@ retry:
 
                if (error == 0) {
                        error = hammer_ip_delete_record(cursor, trans->tid);
+                       if (hammer_debug_inode)
+                               kprintf(" error %d\n", error);
                        if (error && error != EDEADLK) {
                                kprintf("error %d\n", error);
                                Debugger("hammer_update_inode2");
@@ -426,12 +430,15 @@ retry:
                        if (error == 0) {
                                ip->flags |= HAMMER_INODE_DELONDISK;
                        }
-                       hammer_cache_node(cursor->node, &ip->cache[0]);
+                       if (cursor->node)
+                               hammer_cache_node(cursor->node, &ip->cache[0]);
                }
                if (error == EDEADLK) {
                        hammer_done_cursor(cursor);
                        error = hammer_init_cursor(trans, cursor,
                                                   &ip->cache[0], ip);
+                       if (hammer_debug_inode)
+                               kprintf("IPDED %p %d\n", ip, error);
                        if (error == 0)
                                goto retry;
                }
@@ -450,6 +457,7 @@ retry:
                 * Generate a record and write it to the media
                 */
                record = hammer_alloc_mem_record(ip);
+               record->type = HAMMER_MEM_RECORD_GENERAL;
                record->flush_state = HAMMER_FST_FLUSH;
                record->rec.inode = ip->sync_ino_rec;
                record->rec.inode.base.base.create_tid = trans->tid;
@@ -458,11 +466,16 @@ retry:
                record->flags |= HAMMER_RECF_INTERLOCK_BE;
                for (;;) {
                        error = hammer_ip_sync_record_cursor(cursor, record);
+                       if (hammer_debug_inode)
+                               kprintf("GENREC %p rec %08x %d\n",      
+                                       ip, record->flags, error);
                        if (error != EDEADLK)
                                break;
                        hammer_done_cursor(cursor);
                        error = hammer_init_cursor(trans, cursor,
                                                   &ip->cache[0], ip);
+                       if (hammer_debug_inode)
+                               kprintf("GENREC reinit %d\n", error);
                        if (error)
                                break;
                }
@@ -484,6 +497,8 @@ retry:
                 * Finish up.
                 */
                if (error == 0) {
+                       if (hammer_debug_inode)
+                               kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
                        ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
                                            HAMMER_INODE_DDIRTY |
                                            HAMMER_INODE_ITIMES);
@@ -493,11 +508,14 @@ retry:
                         * Root volume count of inodes
                         */
                        if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
-                               hammer_modify_volume(trans, trans->rootvol,
-                                                    NULL, 0);
+                               hammer_modify_volume_field(trans,
+                                                          trans->rootvol,
+                                                          vol0_stat_inodes);
                                ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
                                hammer_modify_volume_done(trans->rootvol);
                                ip->flags |= HAMMER_INODE_ONDISK;
+                               if (hammer_debug_inode)
+                                       kprintf("NOWONDISK %p\n", ip);
                        }
                }
        }
@@ -592,7 +610,8 @@ hammer_rel_inode(struct hammer_inode *ip, int flush)
                         * Determine whether on-disk action is needed for
                         * the inode's final disposition.
                         */
-                       hammer_inode_unloadable_check(ip);
+                       KKASSERT(ip->vp == NULL);
+                       hammer_inode_unloadable_check(ip, 0);
                        if (ip->flags & HAMMER_INODE_MODMASK) {
                                hammer_flush_inode(ip, 0);
                        } else if (ip->lock.refs == 1) {
@@ -794,10 +813,10 @@ hammer_setup_parent_inodes(hammer_record_t record)
        /*
         * If the record is already flushing, is it in our flush group?
         *
-        * If it is in our flush group but it is a delete-on-disk, it
-        * does not improve our connectivity (return 0), and if the
-        * target inode is not trying to destroy itself we can't allow
-        * the operation yet anyway (the second return -1).
+        * If it is in our flush group but it is a general record or a 
+        * delete-on-disk, it does not improve our connectivity (return 0),
+        * and if the target inode is not trying to destroy itself we can't
+        * allow the operation yet anyway (the second return -1).
         */
        if (record->flush_state == HAMMER_FST_FLUSH) {
                if (record->flush_group != hmp->flusher_next) {
@@ -806,6 +825,7 @@ hammer_setup_parent_inodes(hammer_record_t record)
                }
                if (record->type == HAMMER_MEM_RECORD_ADD)
                        return(1);
+               /* GENERAL or DEL */
                return(0);
        }
 
@@ -872,8 +892,8 @@ hammer_setup_parent_inodes(hammer_record_t record)
                        return(1);
 
                /*
-                * The record is a delete-n-disk.  It does not contribute
-                * to our visibility.  We can still flush it.
+                * A general or delete-on-disk record does not contribute
+                * to our visibility.  We can still flush it, however.
                 */
                return(0);
        } else {
@@ -906,6 +926,15 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
        ip->flush_group = ip->hmp->flusher_next;
        ++ip->hmp->flusher_lock;
 
+       /*
+        * We need to be able to vfsync/truncate from the backend.
+        */
+       KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
+       if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
+               ip->flags |= HAMMER_INODE_VHELD;
+               vref(ip->vp);
+       }
+
        /*
         * Figure out how many in-memory records we can actually flush
         * (not including inode meta-data, buffers, etc).
@@ -927,6 +956,10 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
                if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
                        ip->flags |= HAMMER_INODE_REFLUSH;
                        ip->flush_state = HAMMER_FST_SETUP;
+                       if (ip->flags & HAMMER_INODE_VHELD) {
+                               ip->flags &= ~HAMMER_INODE_VHELD;
+                               vrele(ip->vp);
+                       }
                        if (flags & HAMMER_FLUSH_SIGNAL) {
                                ip->flags |= HAMMER_INODE_RESIGNAL;
                                hammer_flusher_async(ip->hmp);
@@ -937,29 +970,6 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
                }
        }
 
-#if 0
-       /*
-        * XXX - don't sync the buffer cache on the frontend, the backend
-        * will do it and we do not want to prematurely activate the backend.
-        *
-        * Sync the buffer cache if the caller wants to flush now, otherwise
-        * don't (any write bios will wake up the flusher).
-        */
-       if ((flags & HAMMER_FLUSH_RECURSION) == 0 &&
-           (flags & HAMMER_FLUSH_SIGNAL)) {
-               if (ip->vp != NULL)
-                       error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
-               else
-                       error = 0;
-       }
-
-       /*
-        * Any further strategy calls will go into the inode's alternative
-        * bioq.
-        */
-       ip->flags |= HAMMER_INODE_WRITE_ALT;
-#endif
-
        /*
         * Snapshot the state of the inode for the backend flusher.
         *
@@ -1071,10 +1081,11 @@ hammer_setup_child_callback(hammer_record_t rec, void *data)
                        r = 1;
                } else {
                        /*
-                        * XXX this needs help.  We have a delete-on-disk
-                        * which could disconnect the target.  If the target
-                        * has its own dependancies they really need to
-                        * be flushed.
+                        * General or delete-on-disk record.
+                        *
+                        * XXX this needs help.  If a delete-on-disk we could
+                        * disconnect the target.  If the target has its own
+                        * dependancies they really need to be flushed.
                         *
                         * XXX
                         */
@@ -1104,7 +1115,7 @@ hammer_setup_child_callback(hammer_record_t rec, void *data)
 void
 hammer_wait_inode(hammer_inode_t ip)
 {
-       while (ip->flush_state == HAMMER_FST_FLUSH) {
+       while (ip->flush_state != HAMMER_FST_IDLE) {
                ip->flags |= HAMMER_INODE_FLUSHW;
                tsleep(&ip->flags, 0, "hmrwin", 0);
        }
@@ -1184,6 +1195,14 @@ hammer_flush_inode_done(hammer_inode_t ip)
                ip->flush_state = HAMMER_FST_SETUP;
        }
 
+       /*
+        * Clean up the vnode ref
+        */
+       if (ip->flags & HAMMER_INODE_VHELD) {
+               ip->flags &= ~HAMMER_INODE_VHELD;
+               vrele(ip->vp);
+       }
+
        /*
         * If the frontend made more changes and requested another flush,
         * then try to get it running.
@@ -1258,8 +1277,12 @@ hammer_sync_record_callback(hammer_record_t record, void *data)
         * be visible to the frontend.
         */
        if (record->flags & HAMMER_RECF_DELETED_FE) {
-               KKASSERT(record->type == HAMMER_MEM_RECORD_ADD);
-               record->flags |= HAMMER_RECF_CONVERT_DELETE;
+               if (record->type == HAMMER_MEM_RECORD_ADD) {
+                       record->flags |= HAMMER_RECF_CONVERT_DELETE;
+               } else {
+                       KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
+                       return(0);
+               }
        }
 
        /*
@@ -1339,6 +1362,8 @@ hammer_sync_inode(hammer_inode_t ip)
                        case HAMMER_MEM_RECORD_DEL:
                                ++nlinks;
                                break;
+                       default:
+                               break;
                        }
                }
        }
@@ -1353,10 +1378,10 @@ hammer_sync_inode(hammer_inode_t ip)
        }
 
        /*
-        * Queue up any pending dirty buffers then set a flag to cause
-        * any further BIOs to go to the alternative queue.
+        * Queue up as many dirty buffers as we can then set a flag to
+        * cause any further BIOs to go to the alternative queue.
         */
-       if (ip->vp)
+       if (ip->flags & HAMMER_INODE_VHELD)
                error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
        ip->flags |= HAMMER_INODE_WRITE_ALT;
 
@@ -1471,7 +1496,8 @@ hammer_sync_inode(hammer_inode_t ip)
                        /*
                         * Adjust the inode count in the volume header
                         */
-                       hammer_modify_volume(&trans, trans.rootvol, NULL, 0);
+                       hammer_modify_volume_field(&trans, trans.rootvol,
+                                                  vol0_stat_inodes);
                        --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
                        hammer_modify_volume_done(trans.rootvol);
                } else {
@@ -1591,21 +1617,31 @@ done:
  * it, which may mean destroying it on-media too.
  */
 void
-hammer_inode_unloadable_check(hammer_inode_t ip)
+hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
 {
+       struct vnode *vp;
+
        /*
         * If the inode is on-media and the link count is 0 we MUST delete
         * it on-media.  DELETING is a mod flag, DELETED is a state flag.
         */
        if (ip->ino_rec.ino_nlinks == 0 &&
            (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
+               ip->flags |= HAMMER_INODE_DELETING;
+               ip->flags |= HAMMER_INODE_TRUNCATED;
+               ip->trunc_off = 0;
+               vp = NULL;
+               if (getvp) {
+                       if (hammer_get_vnode(ip, &vp) != 0)
+                               return;
+               }
                if (ip->vp) {
                        vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
                        vnode_pager_setsize(ip->vp, 0);
                }
-               ip->flags |= HAMMER_INODE_DELETING;
-               ip->flags |= HAMMER_INODE_TRUNCATED;
-               ip->trunc_off = 0;
+               if (getvp) {
+                       vput(vp);
+               }
        }
 }
 
index e237f31..d242914 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.52 2008/05/03 07:59:06 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.53 2008/05/03 20:21:20 dillon Exp $
  */
 
 #include "hammer.h"
@@ -601,7 +601,7 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
                if (ip->ino_rec.ino_nlinks == 0 &&
                    (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))) {
                        hammer_done_cursor(cursor);
-                       hammer_inode_unloadable_check(ip);
+                       hammer_inode_unloadable_check(ip, 1);
                        hammer_flush_inode(ip, 0);
                }
 
@@ -832,6 +832,8 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
 
        for (;;) {
                error = hammer_btree_lookup(cursor);
+               if (hammer_debug_inode)
+                       kprintf("DOINSERT LOOKUP %d\n", error);
                if (error)
                        break;
                if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) {
@@ -889,6 +891,8 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
        /*
         * Fill in the remaining fields and insert our B-Tree node.
         */
+       if (hammer_debug_inode)
+               kprintf("COPYREC %p\n", rec);
        hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0);
        rec->base.base = record->rec.base.base;
        bcopy(&record->rec.base + 1, &rec->base + 1,
@@ -917,6 +921,8 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
        elm.leaf.data_crc = rec->base.data_crc;
 
        error = hammer_btree_insert(cursor, &elm);
+       if (hammer_debug_inode)
+               kprintf("BTREE INSERT error %d @ %016llx:%d\n", error, cursor->node->node_offset, cursor->index);
 
        /*
         * This occurs when the frontend creates a record and queues it to
index bfdeba1..43a3489 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.9 2008/05/03 05:28:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.10 2008/05/03 20:21:20 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -400,9 +400,8 @@ hammer_reblock_node(struct hammer_ioc_reblock *reblock,
                 volume = hammer_get_root_volume(cursor->trans->hmp, &error);
                 KKASSERT(error == 0);
 
-                hammer_modify_volume(cursor->trans, volume,
-                                    &volume->ondisk->vol0_btree_root,
-                                     sizeof(hammer_off_t));
+                hammer_modify_volume_field(cursor->trans, volume,
+                                          vol0_btree_root);
                 volume->ondisk->vol0_btree_root = nnode->node_offset;
                 hammer_modify_volume_done(volume);
                 hammer_rel_volume(volume, 0);
index c40a815..9466ba1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.9 2008/05/02 06:51:57 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.10 2008/05/03 20:21:20 dillon Exp $
  */
 
 /*
 
 #include "hammer.h"
 
+static int hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2);
+
+RB_GENERATE2(hammer_und_rb_tree, hammer_undo, rb_node,
+             hammer_und_rb_compare, hammer_off_t, offset);
+
 /*
  * Convert a zone-3 undo offset into a zone-2 buffer offset.
  */
@@ -72,6 +77,10 @@ hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp)
 /*
  * Generate an UNDO record for the block of data at the specified zone1
  * or zone2 offset.
+ *
+ * The recovery code will execute UNDOs in reverse order, allowing overlaps.
+ * All the UNDOs are executed together so if we already laid one down we
+ * do not have to lay another one down for the same range.
  */
 int
 hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io,
@@ -90,6 +99,14 @@ hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io,
        int error;
        int bytes;
 
+       /*
+        * Enter the offset into our undo history.  If there is an existing
+        * undo we do not have to generate a new one.
+        */
+       if (hammer_enter_undo_history(trans->hmp, zone_off, len) == EALREADY) {
+               return(0);
+       }
+
        root_volume = trans->rootvol;
        ondisk = root_volume->ondisk;
        undomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
@@ -190,6 +207,58 @@ again:
        return(error);
 }
 
+/*
+ * UNDO HISTORY API
+ *
+ * It is not necessary to layout an undo record for the same address space
+ * multiple times.  Maintain a cache of recent undo's.
+ */
+
+/*
+ * Enter an undo into the history.  Return EALREADY if the request completely
+ * covers a previous request.
+ */
+int
+hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, int bytes)
+{
+       hammer_undo_t node;
+       hammer_undo_t onode;
+
+       node = RB_LOOKUP(hammer_und_rb_tree, &hmp->rb_undo_root, offset);
+       if (node) {
+               TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry);
+               TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry);
+               if (bytes <= node->bytes)
+                       return(EALREADY);
+               node->bytes = bytes;
+               return(0);
+       }
+       if (hmp->undo_alloc != HAMMER_MAX_UNDOS) {
+               node = &hmp->undos[hmp->undo_alloc++];
+       } else {
+               node = TAILQ_FIRST(&hmp->undo_lru_list);
+               TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry);
+               RB_REMOVE(hammer_und_rb_tree, &hmp->rb_undo_root, node);
+       }
+       node->offset = offset;
+       node->bytes = bytes;
+       TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry);
+       onode = RB_INSERT(hammer_und_rb_tree, &hmp->rb_undo_root, node);
+       KKASSERT(onode == NULL);
+       return(0);
+}
+
+void
+hammer_clear_undo_history(hammer_mount_t hmp)
+{
+       RB_INIT(&hmp->rb_undo_root);
+       TAILQ_INIT(&hmp->undo_lru_list);
+       hmp->undo_alloc = 0;
+}
+
+/*
+ * Misc helper routines.  Return available space and total space.
+ */
 int64_t
 hammer_undo_space(hammer_mount_t hmp)
 {
@@ -221,3 +290,13 @@ hammer_undo_max(hammer_mount_t hmp)
        return(max_bytes);
 }
 
+static int
+hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2)
+{
+        if (node1->offset < node2->offset)
+                return(-1);
+        if (node1->offset > node2->offset)
+                return(1);
+        return(0);
+}
+
index 351a863..a21e30c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.31 2008/05/02 01:00:42 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.32 2008/05/03 20:21:20 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -48,6 +48,7 @@
 #include "hammer.h"
 
 int hammer_debug_general;
+int hammer_debug_inode;
 int hammer_debug_locks;
 int hammer_debug_btree;
 int hammer_debug_tid;
@@ -68,6 +69,8 @@ int64_t hammer_zone_limit;
 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW,
           &hammer_debug_general, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_inode, CTLFLAG_RW,
+          &hammer_debug_inode, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_locks, CTLFLAG_RW,
           &hammer_debug_locks, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
@@ -199,6 +202,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
 
                TAILQ_INIT(&hmp->flush_list);
                TAILQ_INIT(&hmp->objid_cache_list);
+               TAILQ_INIT(&hmp->undo_lru_list);
 
                /*
                 * Set default zone limits.  This value can be reduced
@@ -243,6 +247,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
        RB_INIT(&hmp->rb_vols_root);
        RB_INIT(&hmp->rb_inos_root);
        RB_INIT(&hmp->rb_nods_root);
+       RB_INIT(&hmp->rb_undo_root);
        hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
 
        TAILQ_INIT(&hmp->volu_list);
@@ -488,7 +493,7 @@ hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
                *vpp = NULL;
                return(error);
        }
-       error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
+       error = hammer_get_vnode(ip, vpp);
        hammer_rel_inode(ip, 0);
        hammer_done_transaction(&trans);
        return (error);
@@ -604,7 +609,7 @@ hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
                *vpp = NULL;
                return(error);
        }
-       error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
+       error = hammer_get_vnode(ip, vpp);
        hammer_rel_inode(ip, 0);
        hammer_done_transaction(&trans);
        return (error);
index e377b14..e0551cb 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.45 2008/05/03 05:28:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.46 2008/05/03 20:21:20 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -173,6 +173,7 @@ hammer_vop_fsync(struct vop_fsync_args *ap)
        hammer_inode_t ip = VTOI(ap->a_vp);
 
        hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
+       vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
        if (ap->a_waitfor == MNT_WAIT)
                hammer_wait_inode(ip);
        return (ip->error);
@@ -510,7 +511,7 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap)
                hammer_done_transaction(&trans);
                *ap->a_vpp = NULL;
        } else {
-               error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
+               error = hammer_get_vnode(nip, ap->a_vpp);
                hammer_done_transaction(&trans);
                hammer_rel_inode(nip, 0);
                if (error == 0) {
@@ -634,7 +635,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
                                      asof, flags, &error);
                if (error == 0) {
-                       error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
+                       error = hammer_get_vnode(ip, &vp);
                        hammer_rel_inode(ip, 0);
                } else {
                        vp = NULL;
@@ -699,7 +700,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                ip = hammer_get_inode(&trans, &dip->cache[1],
                                      obj_id, asof, flags, &error);
                if (error == 0) {
-                       error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
+                       error = hammer_get_vnode(ip, &vp);
                        hammer_rel_inode(ip, 0);
                } else {
                        kprintf("nresolve: lookup %s failed dip %p (%016llx) on"
@@ -772,7 +773,7 @@ hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
        ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
                              asof, dip->flags, &error);
        if (ip) {
-               error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp);
+               error = hammer_get_vnode(ip, ap->a_vpp);
                hammer_rel_inode(ip, 0);
        } else {
                *ap->a_vpp = NULL;
@@ -879,7 +880,7 @@ hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
                hammer_rel_inode(nip, 0);
                *ap->a_vpp = NULL;
        } else {
-               error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
+               error = hammer_get_vnode(nip, ap->a_vpp);
                hammer_rel_inode(nip, 0);
                if (error == 0) {
                        cache_setunresolved(ap->a_nch);
@@ -941,7 +942,7 @@ hammer_vop_nmknod(struct vop_nmknod_args *ap)
                hammer_rel_inode(nip, 0);
                *ap->a_vpp = NULL;
        } else {
-               error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
+               error = hammer_get_vnode(nip, ap->a_vpp);
                hammer_rel_inode(nip, 0);
                if (error == 0) {
                        cache_setunresolved(ap->a_nch);
@@ -1541,6 +1542,7 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
         */
        if (error == 0) {
                record = hammer_alloc_mem_record(nip);
+               record->type = HAMMER_MEM_RECORD_GENERAL;
                bytes = strlen(ap->a_target);
 
                record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK;
@@ -1568,7 +1570,7 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
                hammer_rel_inode(nip, 0);
                *ap->a_vpp = NULL;
        } else {
-               error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
+               error = hammer_get_vnode(nip, ap->a_vpp);
                hammer_rel_inode(nip, 0);
                if (error == 0) {
                        cache_setunresolved(ap->a_nch);