HAMMER 14/many - historical access cleanup, itimes, bug fixes.
authorMatthew Dillon <dillon@dragonflybsd.org>
Tue, 1 Jan 2008 01:00:03 +0000 (01:00 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Tue, 1 Jan 2008 01:00:03 +0000 (01:00 +0000)
With this commit most historical accesses work and I can go through at least
two cycles of cpdup / rm -rf without crashing.

* Fix a bug in the B-Tree code related to searches on historical records.
  These records are differentiated by key.create_tid but unlike the rest
  of the key the matching algorithm has to be somewhat more sophisticated.
  e.g. A search as of time 10 needs to find a record with a create_tid of 5.

  To make this work properly we use a trick when we generate the separator
  when splitting a leaf. see hammer_btree.c / hammer_make_separator().

* Recycle inodes a link count of 0 immediately.

* Optimization: Do not flush backing store to disk on reclaim.

* Add a per-inode read-only flag.  Mark all historical inodes as read-only

* Implement read-only semantics in the vnops and assert attempts to modify
  inodes marked read-only.

* Properly record the last transaction id for use when synchronizing
  inodes to the platter.  There were a few cases when late synchronizations
  were using the wrong transaction id, breaking user expectations when
  accessing historical data after a sync.

* Update the itimes (atime and mtime). These are non-historical updates by
  default (meaning we just overwrite the latest inode record instead of
  deleting it and inserting a new one).  A future mount option will allow
  these updates to operate historically (the coding is trivial)... didn't
  you always want to know the last time a file was accessed prior to a
  particular date?  Think about it...

* Fix an inode memory leak.  The inode in-memory structure must be freed
  on last reference.  There were cases where it was being left in the
  HAMMER cache.

* Optimization: Reduce vnode scan overhead during 'sync' by improving
  on the inode flags which indicate that some sort of sync is required.

* Optimization: Don't flush inodes when their link count drops by 1, try
  to only flush them when their link drops to 0.

* Fix a couple of potential deadlocks.

* Fix a case in the vnops code where an inode was not being properly flagged
  as being dirty.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_transaction.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 858bbf3..57d9669 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.17 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.18 2008/01/01 01:00:03 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -165,15 +165,17 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_INODE_DDIRTY    0x0001  /* in-memory ino_data is dirty */
 #define HAMMER_INODE_RDIRTY    0x0002  /* in-memory ino_rec is dirty */
 #define HAMMER_INODE_ITIMES    0x0004  /* in-memory mtime/atime modified */
+#define HAMMER_INODE_XDIRTY    0x0008  /* in-memory records present */
 #define HAMMER_INODE_ONDISK    0x0010  /* inode is on-disk (else not yet) */
 #define HAMMER_INODE_FLUSH     0x0020  /* flush on last ref */
-#define HAMMER_INODE_TID       0x0040  /* update in-memory last_tid */
 #define HAMMER_INODE_DELETED   0x0080  /* inode ready for deletion */
 #define HAMMER_INODE_DELONDISK 0x0100  /* delete synchronized to disk */
+#define HAMMER_INODE_RO                0x0200  /* read-only (because of as-of) */
+#define HAMMER_INODE_GONE      0x0400  /* delete flushed out */
 
 #define HAMMER_INODE_MODMASK   (HAMMER_INODE_DDIRTY|HAMMER_INODE_RDIRTY| \
-                                HAMMER_INODE_ITIMES|HAMMER_INODE_FLUSH|  \
-                                HAMMER_INODE_DELETED)
+                                HAMMER_INODE_XDIRTY|\
+                                HAMMER_INODE_ITIMES|HAMMER_INODE_DELETED)
 
 #define HAMMER_MAX_INODE_CURSORS       4
 
@@ -434,6 +436,7 @@ extern struct hammer_alist_config Clu_slave_alist_config;
 extern struct bio_ops hammer_bioops;
 
 extern int hammer_debug_btree;
+extern int hammer_debug_tid;
 extern int hammer_count_inodes;
 extern int hammer_count_records;
 extern int hammer_count_record_datas;
@@ -450,12 +453,12 @@ int       hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
 int    hammer_get_vnode(struct hammer_inode *ip, int lktype,
                        struct vnode **vpp);
 struct hammer_inode *hammer_get_inode(hammer_mount_t hmp,
-                       u_int64_t obj_id, hammer_tid_t asof, int *errorp);
-int    hammer_update_inode(hammer_inode_t ip);
+                       u_int64_t obj_id, hammer_tid_t asof, int flags,
+                       int *errorp);
 void   hammer_put_inode(struct hammer_inode *ip);
 void   hammer_put_inode_ref(struct hammer_inode *ip);
 
-int    hammer_unload_inode(hammer_inode_t ip, void *data __unused);
+int    hammer_unload_inode(hammer_inode_t ip, void *data);
 int    hammer_unload_volume(hammer_volume_t volume, void *data __unused);
 int    hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused);
 int    hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused);
@@ -496,6 +499,7 @@ void        hammer_to_timespec(hammer_tid_t tid, struct timespec *ts);
 hammer_tid_t hammer_timespec_to_transid(struct timespec *ts);
 hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans);
 hammer_tid_t hammer_now_tid(void);
+hammer_tid_t hammer_str_to_tid(const char *str);
 hammer_tid_t hammer_alloc_recid(hammer_transaction_t trans);
 
 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
@@ -590,6 +594,8 @@ void hammer_init_alist_config(void);
 
 void hammer_start_transaction(struct hammer_transaction *trans,
                              struct hammer_mount *hmp);
+void hammer_start_transaction_tid(struct hammer_transaction *trans,
+                                 struct hammer_mount *hmp, hammer_tid_t tid);
 void hammer_commit_transaction(struct hammer_transaction *trans);
 void hammer_abort_transaction(struct hammer_transaction *trans);
 
index e8389b4..115dd7a 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.14 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.15 2008/01/01 01:00:03 dillon Exp $
  */
 
 /*
@@ -170,7 +170,6 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                /*
                 * Check internal or leaf element.  Determine if the record
                 * at the cursor has gone beyond the end of our range.
-                * Remember that our key range is end-exclusive.
                 *
                 * Generally we recurse down through internal nodes.  An
                 * internal node can only be returned if INCLUSTER is set
@@ -610,11 +609,12 @@ btree_search(hammer_cursor_t cursor, int flags)
        flags |= cursor->flags;
 
        if (hammer_debug_btree) {
-               kprintf("SEARCH   %p:%d %016llx %02x %016llx\n",
+               kprintf("SEARCH   %p:%d %016llx %02x key=%016llx tid=%016llx\n",
                        cursor->node, cursor->index,
                        cursor->key_beg.obj_id,
                        cursor->key_beg.rec_type,
-                       cursor->key_beg.key
+                       cursor->key_beg.key,
+                       cursor->key_beg.create_tid
                );
        }
 
@@ -744,6 +744,14 @@ btree_search(hammer_cursor_t cursor, int flags)
                /*
                 * Scan the node to find the subtree index to push down into.
                 * We go one-past, then back-up.
+                *
+                * We have a serious issue with the midpoints for internal
+                * nodes when the midpoint bisects two historical records
+                * (where only create_tid is different).  Short of iterating
+                * through the record's entire history the only solution is
+                * to calculate a midpoint that isn't a midpoint in that
+                * case.   Please see hammer_make_separator() for more
+                * information.
                 */
                for (i = 0; i < node->count; ++i) {
                        r = hammer_btree_cmp(&cursor->key_beg,
@@ -794,13 +802,22 @@ btree_search(hammer_cursor_t cursor, int flags)
                         * index must be PAST the last element to prevent an
                         * iteration from returning elements to the left of
                         * key_beg.
+                        *
+                        * NOTE: For the case where the right hand boundary
+                        * separates two historical records (where only
+                        * create_tid differs), we rely on the boundary
+                        * being exactly equal to the next record.  This
+                        * is handled by hammer_make_separator().  If this
+                        * were not true we would have to fall through for
+                        * the r == 1 case.
                         */
-                       if ((flags & HAMMER_CURSOR_INSERT) == 0 &&
-                           hammer_btree_cmp(&cursor->key_beg,
-                                            &node->elms[i].base) >= 0
-                       ) {
-                               cursor->index = i;
-                               return(ENOENT);
+                       if ((flags & HAMMER_CURSOR_INSERT) == 0) {
+                               r = hammer_btree_cmp(&cursor->key_beg,
+                                                    &node->elms[i].base);
+                               if (r >= 0) {
+                                       cursor->index = i;
+                                       return(ENOENT);
+                               }
                        }
 
                        /*
@@ -824,11 +841,12 @@ btree_search(hammer_cursor_t cursor, int flags)
 
                if (hammer_debug_btree) {
                        hammer_btree_elm_t elm = &node->elms[i];
-                       kprintf("SEARCH-I %p:%d %016llx %02x %016llx\n",
+                       kprintf("SEARCH-I %p:%d %016llx %02x key=%016llx tid=%016llx\n",
                                cursor->node, i,
                                elm->internal.base.obj_id,
                                elm->internal.base.rec_type,
-                               elm->internal.base.key
+                               elm->internal.base.key,
+                               elm->internal.base.create_tid
                        );
                }
 
@@ -931,15 +949,24 @@ btree_search(hammer_cursor_t cursor, int flags)
                r = hammer_btree_cmp(&cursor->key_beg, &node->elms[i].base);
 
                /*
-                * Stop if we've flipped past key_beg
+                * Stop if we've flipped past key_beg.  This includes a
+                * record whos create_tid is larger then our asof id.
                 */
                if (r < 0)
                        break;
 
                /*
-                * Return an exact match
+                * Return an exact match.  In this case we have to do special
+                * checks if the only difference in the records is the
+                * create_ts, in order to properly match against our as-of
+                * query.
                 */
-               if (r == 0) {
+               if (r >= 0 && r <= 1) {
+                       if ((cursor->flags & HAMMER_CURSOR_ALLHISTORY) == 0 &&
+                           hammer_btree_chkts(cursor->key_beg.create_tid,
+                                              &node->elms[i].base) != 0) {
+                               continue;
+                       }
                        cursor->index = i;
                        error = 0;
                        if (hammer_debug_btree) {
@@ -1805,6 +1832,9 @@ done:
 /*
  * Compare two B-Tree elements, return -N, 0, or +N (e.g. similar to strcmp).
  *
+ * Note that for this particular function a return value of -1, 0, or +1
+ * can denote a match if create_tid is otherwise discounted.
+ *
  * See also hammer_rec_rb_compare() and hammer_rec_cmp() in hammer_object.c.
  */
 int
@@ -1824,6 +1854,11 @@ hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2)
                return(-2);
        if (key1->key > key2->key)
                return(2);
+
+       if (key1->create_tid < key2->create_tid)
+               return(-1);
+       if (key1->create_tid > key2->create_tid)
+               return(1);
        return(0);
 }
 
@@ -1845,14 +1880,13 @@ hammer_btree_chkts(hammer_tid_t create_tid, hammer_base_elm_t base)
  * Create a separator half way inbetween key1 and key2.  For fields just
  * one unit apart, the separator will match key2.
  *
- * The handling of delete_tid is a little confusing.  It is only possible
- * to have one record in the B-Tree where all fields match except delete_tid.
- * This means, worse case, two adjacent elements may have a create_tid that
- * is one-apart and cause the separator to choose the right-hand element's
- * create_tid.  e.g.  (create,delete):  (1,x)(2,x) -> separator is (2,x).
+ * At the moment require that the separator never match key2 exactly.
  *
- * So all we have to do is set delete_tid to the right-hand element to
- * guarentee that the separator is properly between the two elements.
+ * We have to special case the separator between two historical keys,
+ * where all elements except create_tid match.  In this case our B-Tree
+ * searches can't figure out which branch of an internal node to go down
+ * unless the mid point's create_tid is exactly key2.
+ * (see btree_search()'s scan code on HAMMER_BTREE_TYPE_INTERNAL).
  */
 #define MAKE_SEPARATOR(key1, key2, dest, field)        \
        dest->field = key1->field + ((key2->field - key1->field + 1) >> 1);
@@ -1865,8 +1899,13 @@ hammer_make_separator(hammer_base_elm_t key1, hammer_base_elm_t key2,
        MAKE_SEPARATOR(key1, key2, dest, obj_id);
        MAKE_SEPARATOR(key1, key2, dest, rec_type);
        MAKE_SEPARATOR(key1, key2, dest, key);
-       MAKE_SEPARATOR(key1, key2, dest, create_tid);
-       dest->delete_tid = key2->delete_tid;
+       if (key1->obj_id == key2->obj_id &&
+           key1->rec_type == key2->rec_type &&
+           key1->key == key2->key) {
+               dest->create_tid = key2->create_tid;
+       } else {
+               dest->create_tid = 0;
+       }
 }
 
 #undef MAKE_SEPARATOR
index ae41173..2e26d69 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.15 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.16 2008/01/01 01:00:03 dillon Exp $
  */
 
 #include "hammer.h"
 #include <sys/buf.h>
 #include <sys/buf2.h>
 
+/*
+ * The kernel is not actively referencing this vnode but is still holding
+ * it cached.
+ */
 int
 hammer_vop_inactive(struct vop_inactive_args *ap)
 {
@@ -55,13 +59,22 @@ hammer_vop_inactive(struct vop_inactive_args *ap)
         * If the inode no longer has any references we recover its
         * in-memory resources immediately.
         */
-       if (ip->ino_rec.ino_nlinks == 0 &&
-           (ip->hmp->mp->mnt_flag & MNT_RDONLY) == 0) {
-               hammer_sync_inode(ip, MNT_NOWAIT, 1);
-       }
+       if (ip->ino_rec.ino_nlinks == 0)
+               vrecycle(ap->a_vp);
        return(0);
 }
 
+/*
+ * Release the vnode association.  This is typically (but not always)
+ * the last reference on the inode and will flush the inode to the
+ * buffer cache.
+ *
+ * XXX Currently our sync code only runs through inodes with vnode
+ * associations, so we depend on hammer_rel_inode() to sync any inode
+ * record data to the block device prior to losing the association.
+ * Otherwise transactions that the user expected to be distinct by
+ * doing a manual sync may be merged.
+ */
 int
 hammer_vop_reclaim(struct vop_reclaim_args *ap)
 {
@@ -70,13 +83,10 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap)
 
        vp = ap->a_vp;
 
-       /*
-        * Release the vnode association and ask that the inode be flushed.
-        */
        if ((ip = vp->v_data) != NULL) {
                vp->v_data = NULL;
                ip->vp = NULL;
-               hammer_rel_inode(ip, 1);
+               hammer_rel_inode(ip, 0);
        }
        return(0);
 }
@@ -97,7 +107,7 @@ hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
         * unlocked while we manipulate the related vnode to avoid a
         * deadlock.
         */
-       ip = hammer_get_inode(hmp, ino, hmp->asof, &error);
+       ip = hammer_get_inode(hmp, ino, hmp->asof, 0, &error);
        if (ip == NULL) {
                *vpp = NULL;
                return(error);
@@ -179,10 +189,13 @@ hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
  * Acquire a HAMMER inode.  The returned inode is not locked.  These functions
  * do not attach or detach the related vnode (use hammer_get_vnode() for
  * that).
+ *
+ * The flags argument is only applied for newly created inodes, and only
+ * certain flags are inherited.
  */
 struct hammer_inode *
 hammer_get_inode(struct hammer_mount *hmp, u_int64_t obj_id, hammer_tid_t asof,
-                int *errorp)
+                int flags, int *errorp)
 {
        struct hammer_inode_info iinfo;
        struct hammer_cursor cursor;
@@ -207,6 +220,9 @@ loop:
        ip->obj_id = obj_id;
        ip->obj_asof = iinfo.obj_asof;
        ip->hmp = hmp;
+       ip->flags = flags & HAMMER_INODE_RO;
+       if (hmp->ronly)
+               ip->flags |= HAMMER_INODE_RO;
        RB_INIT(&ip->rec_tree);
 
        /*
@@ -229,12 +245,12 @@ loop:
        /*
         * On success the B-Tree lookup will hold the appropriate
         * buffer cache buffers and provide a pointer to the requested
-        * information.  Copy the information to the in-memory inode.
+        * information.  Copy the information to the in-memory inode
+        * and cache the B-Tree node to improve future operations.
         */
        if (*errorp == 0) {
                ip->ino_rec = cursor.record->inode;
                ip->ino_data = cursor.data->inode;
-       } else if (cursor.node) {
                hammer_cache_node(cursor.node, &ip->cache);
        }
 
@@ -352,13 +368,17 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
        return(0);
 }
 
-int
+/*
+ * Called by hammer_sync_inode().
+ */
+static int
 hammer_update_inode(hammer_inode_t ip)
 {
        struct hammer_cursor cursor;
        struct hammer_cursor *spike = NULL;
        hammer_record_t record;
        int error;
+       hammer_tid_t last_tid;
 
        /*
         * Locate the record on-disk and mark it as deleted.  Both the B-Tree
@@ -371,6 +391,7 @@ hammer_update_inode(hammer_inode_t ip)
         * XXX Update the inode record and data in-place if the retention
         * policy allows it.
         */
+       last_tid = ip->last_tid;
 retry:
        error = 0;
 
@@ -388,7 +409,7 @@ retry:
                error = hammer_btree_lookup(&cursor);
 
                if (error == 0) {
-                       error = hammer_ip_delete_record(&cursor, ip->last_tid);
+                       error = hammer_ip_delete_record(&cursor, last_tid);
                        if (error == 0)
                                ip->flags |= HAMMER_INODE_DELONDISK;
                }
@@ -407,7 +428,7 @@ retry:
        if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { 
                record = hammer_alloc_mem_record(ip);
                record->rec.inode = ip->ino_rec;
-               record->rec.inode.base.base.create_tid = ip->last_tid;
+               record->rec.inode.base.base.create_tid = last_tid;
                record->rec.inode.base.data_len = sizeof(ip->ino_data);
                record->data = (void *)&ip->ino_data;
                error = hammer_ip_sync_record(record, &spike);
@@ -420,8 +441,10 @@ retry:
                }
                KKASSERT(spike == NULL);
                if (error == 0) {
-                       ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
-                                      HAMMER_INODE_DELONDISK);
+                       ip->flags &= ~(HAMMER_INODE_RDIRTY |
+                                      HAMMER_INODE_DDIRTY |
+                                      HAMMER_INODE_DELONDISK |
+                                      HAMMER_INODE_ITIMES);
                        if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
                                hammer_modify_volume(ip->hmp->rootvol);
                                ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
@@ -430,22 +453,65 @@ retry:
                        }
                }
        }
-       if (error == 0)
-               ip->flags &= ~HAMMER_INODE_TID;
        return(error);
 }
 
 /*
- * Release a reference on an inode and unload it if told to flush.
+ * Update only the itimes fields.  This is done no-historically.  The
+ * record is updated in-place on the disk.
+ */
+static int
+hammer_update_itimes(hammer_inode_t ip)
+{
+       struct hammer_cursor cursor;
+       struct hammer_inode_record *rec;
+       int error;
+
+       error = 0;
+       if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
+           HAMMER_INODE_ONDISK) {
+               hammer_init_cursor_ip(&cursor, ip);
+               cursor.key_beg.obj_id = ip->obj_id;
+               cursor.key_beg.key = 0;
+               cursor.key_beg.create_tid = ip->obj_asof;
+               cursor.key_beg.delete_tid = 0;
+               cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
+               cursor.key_beg.obj_type = 0;
+               cursor.flags = HAMMER_CURSOR_GET_RECORD;
+
+               error = hammer_btree_lookup(&cursor);
+
+               if (error == 0) {
+                       rec = &cursor.record->inode;
+                       hammer_modify_buffer(cursor.record_buffer);
+                       rec->ino_atime = ip->ino_rec.ino_atime;
+                       rec->ino_mtime = ip->ino_rec.ino_mtime;
+                       hammer_modify_buffer_done(cursor.record_buffer);
+                       ip->flags &= ~HAMMER_INODE_ITIMES;
+                       /* XXX recalculate crc */
+               }
+               hammer_cache_node(cursor.node, &ip->cache);
+               hammer_done_cursor(&cursor);
+       }
+       return(error);
+}
+
+/*
+ * Release a reference on an inode.  If asked to flush the last release
+ * will flush the inode.
  */
 void
 hammer_rel_inode(struct hammer_inode *ip, int flush)
 {
        hammer_unref(&ip->lock);
-       if (flush || ip->ino_rec.ino_nlinks == 0)
+       if (flush)
                ip->flags |= HAMMER_INODE_FLUSH;
-       if (ip->lock.refs == 0 && (ip->flags & HAMMER_INODE_FLUSH))
-               hammer_unload_inode(ip, NULL);
+       if (ip->lock.refs == 0) {
+               if (ip->flags & HAMMER_INODE_FLUSH)
+                       hammer_unload_inode(ip, (void *)MNT_WAIT);
+               else
+                       hammer_unload_inode(ip, (void *)MNT_NOWAIT);
+       }
 }
 
 /*
@@ -454,7 +520,7 @@ hammer_rel_inode(struct hammer_inode *ip, int flush)
  * (called via RB_SCAN)
  */
 int
-hammer_unload_inode(struct hammer_inode *ip, void *data __unused)
+hammer_unload_inode(struct hammer_inode *ip, void *data)
 {
        int error;
 
@@ -463,29 +529,48 @@ hammer_unload_inode(struct hammer_inode *ip, void *data __unused)
        KKASSERT(ip->vp == NULL);
        hammer_ref(&ip->lock);
 
-       error = hammer_sync_inode(ip, MNT_WAIT, 1);
+       error = hammer_sync_inode(ip, (int)data, 1);
        if (error)
                kprintf("hammer_sync_inode failed error %d\n", error);
-       KKASSERT(RB_EMPTY(&ip->rec_tree));
-       RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
+       if (ip->lock.refs == 1) {
+               KKASSERT(RB_EMPTY(&ip->rec_tree));
+               RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
 
-       hammer_uncache_node(&ip->cache);
-       --hammer_count_inodes;
-       kfree(ip, M_HAMMER);
+               hammer_uncache_node(&ip->cache);
+               --hammer_count_inodes;
+               kfree(ip, M_HAMMER);
+       } else {
+               hammer_unref(&ip->lock);
+       }
        return(0);
 }
 
 /*
- * A transaction has modified an inode, requiring a new record and possibly
- * also data to be written out.
+ * A transaction has modified an inode, requiring updates as specified by
+ * the passed flags.
  *
- * last_tid is the TID to use for the disk sync.
+ * HAMMER_INODE_RDIRTY:        Inode record has been updated
+ * HAMMER_INODE_DDIRTY: Inode data has been updated
+ * HAMMER_INODE_DELETED: Inode record/data must be deleted
+ * HAMMER_INODE_ITIMES: mtime/atime has been updated
+ *
+ * last_tid is the TID to use to generate the correct TID when the inode
+ * is synced to disk.
  */
 void
 hammer_modify_inode(struct hammer_transaction *trans,
                    struct hammer_inode *ip, int flags)
 {
-       if ((flags & HAMMER_INODE_TID) && (ip->flags & HAMMER_INODE_TID) == 0) {
+       KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
+                 (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
+                  HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES) == 0);
+
+       if (flags &
+           (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|HAMMER_INODE_DELETED)) {
+               if (hammer_debug_tid) {
+                       kprintf("hammer_modify_inode: %016llx (%08x)\n", 
+                               trans->tid, (int)(trans->tid / 1000000000LL));
+               }
                ip->last_tid = trans->tid;
        }
        ip->flags |= flags;
@@ -532,8 +617,19 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
        struct hammer_cursor *spike = NULL;
        int error;
 
+       if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
+               return(0);
+       }
+
        hammer_lock_ex(&ip->lock);
-       hammer_start_transaction(&trans, ip->hmp);
+
+       /*
+        * Use the transaction id of the last operation to sync.
+        */
+       if (ip->last_tid)
+               hammer_start_transaction_tid(&trans, ip->hmp, ip->last_tid);
+       else
+               hammer_start_transaction(&trans, ip->hmp);
 
        /*
         * If the inode has been deleted (nlinks == 0), and the OS no longer
@@ -552,15 +648,15 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
         * so we can pass a NULL spike structure, because no partial data
         * deletion can occur (yet).
         */
-       if (ip->ino_rec.ino_nlinks == 0 && handle_delete) {
+       if (ip->ino_rec.ino_nlinks == 0 && handle_delete && 
+           (ip->flags & HAMMER_INODE_GONE) == 0) {
+               ip->flags |= HAMMER_INODE_GONE;
                if (ip->vp)
                        vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
                error = hammer_ip_delete_range_all(&trans, ip);
                KKASSERT(RB_EMPTY(&ip->rec_tree));
-               ip->flags &= ~HAMMER_INODE_TID;
                ip->ino_rec.base.base.delete_tid = trans.tid;
-               hammer_modify_inode(&trans, ip,
-                                   HAMMER_INODE_DELETED | HAMMER_INODE_TID);
+               hammer_modify_inode(&trans, ip, HAMMER_INODE_DELETED);
                hammer_modify_volume(ip->hmp->rootvol);
                --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
                hammer_modify_volume_done(ip->hmp->rootvol);
@@ -590,6 +686,8 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
                }
                break;
        }
+       if (RB_EMPTY(&ip->rec_tree))
+               ip->flags &= ~HAMMER_INODE_XDIRTY;
 
        /*
         * Now update the inode's on-disk inode-data and/or on-disk record.
@@ -606,7 +704,8 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
                 * Take care of the case where a deleted inode was never
                 * flushed to the disk in the first place.
                 */
-               ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY);
+               ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
+                              HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES);
                while (RB_ROOT(&ip->rec_tree)) {
                        hammer_record_t rec = RB_ROOT(&ip->rec_tree);
                        hammer_ref(&rec->lock);
@@ -629,11 +728,21 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
        }
 
        /*
-        * If RDIRTY or DDIRTY is set, write out a new record.  If the
-        * inode is already on-disk, the old record is marked as deleted.
+        * If RDIRTY or DDIRTY is set, write out a new record.  If the inode
+        * is already on-disk the old record is marked as deleted.
+        *
+        * If DELETED is set hammer_update_inode() will delete the existing
+        * record without writing out a new one.
+        *
+        * If *ONLY* the ITIMES flag is set we can update the record in-place.
         */
+       if ((ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
+                        HAMMER_INODE_ITIMES | HAMMER_INODE_DELETED)) ==
+           HAMMER_INODE_ITIMES) {
+               error = hammer_update_itimes(ip);
+       } else
        if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
-                        HAMMER_INODE_DELETED)) {
+                        HAMMER_INODE_ITIMES | HAMMER_INODE_DELETED)) {
                error = hammer_update_inode(ip);
        }
        hammer_commit_transaction(&trans);
index b09a8d5..4fa4411 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.10 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.11 2008/01/01 01:00:03 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -154,8 +154,8 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io)
        } else {
                if (io->released) {
                        regetblk(bp);
-                       io->released = 0;
                        BUF_KERNPROC(bp);
+                       io->released = 0;
                }
        }
        io->modified = 1;
@@ -183,7 +183,8 @@ hammer_io_notify_cluster(hammer_cluster_t cluster)
                if (cluster->state == HAMMER_CLUSTER_IDLE) {
                        if (io->released)
                                regetblk(io->bp);
-                       io->released = 1;
+                       else
+                               io->released = 1;
                        kprintf("MARK CLUSTER OPEN\n");
                        cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN;
                        cluster->state = HAMMER_CLUSTER_ASYNC;
@@ -367,8 +368,8 @@ hammer_io_intend_modify(struct hammer_io *io)
                hammer_lock_ex(&io->lock);
                if (io->released) {
                        regetblk(io->bp);
-                       io->released = 0;
                        BUF_KERNPROC(io->bp);
+                       io->released = 0;
                }
                hammer_unlock(&io->lock);
        }
index 350c7cf..60e5e0d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.13 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.14 2008/01/01 01:00:03 dillon Exp $
  */
 
 #include "hammer.h"
@@ -342,8 +342,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        bcopy(ncp->nc_name, record->data, bytes);
        record->rec.entry.base.data_len = bytes;
        ++ip->ino_rec.ino_nlinks;
-       hammer_modify_inode(trans, ip,
-                           HAMMER_INODE_RDIRTY | HAMMER_INODE_TID);
+       hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
        error = hammer_mem_add(trans, record);
        return(error);
 }
@@ -365,15 +364,16 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
 
        /*
         * One less link.  The file may still be open in the OS even after
-        * all links have gone away so we don't destroy the inode's data
-        * here.
+        * all links have gone away so we only try to sync if the OS has
+        * no references and nlinks falls to 0.
         */
        if (error == 0) {
                --ip->ino_rec.ino_nlinks;
-               hammer_modify_inode(trans, ip,
-                                   HAMMER_INODE_RDIRTY | HAMMER_INODE_TID);
-               if (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))
+               hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
+               if (ip->ino_rec.ino_nlinks == 0 &&
+                   (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))) {
                        hammer_sync_inode(ip, MNT_NOWAIT, 1);
+               }
 
        }
        return(error);
@@ -415,8 +415,7 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record)
                        record->flags |= HAMMER_RECF_EMBEDDED_DATA;
                }
        }
-       hammer_modify_inode(trans, ip,
-                           HAMMER_INODE_RDIRTY | HAMMER_INODE_TID);
+       hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
        error = hammer_mem_add(trans, record);
        return(error);
 }
@@ -803,6 +802,7 @@ hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
                record->rec.base.base.key |= trans->hmp->namekey_iterator;
        }
        record->flags |= HAMMER_RECF_ONRBTREE;
+       hammer_modify_inode(trans, record->ip, HAMMER_INODE_XDIRTY);
        hammer_rel_mem_record(record);
        return(0);
 }
@@ -1158,7 +1158,7 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip,
                         * we missing a + 1 somewhere?  Note that ran_end
                         * could overflow.
                         */
-                       if (base->key > ran_end) {
+                       if (base->key - 1 > ran_end) {
                                if (base->key - rec->base.data_len > ran_end) {
                                        kprintf("right edge OOB\n");
                                        break;
@@ -1269,6 +1269,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
        if (error == 0) {
                hammer_modify_buffer(cursor->record_buffer);
                cursor->record->base.base.delete_tid = tid;
+
                hammer_modify_buffer_done(cursor->record_buffer);
                hammer_modify_node(cursor->node);
                elm = &cursor->node->ondisk->elms[cursor->index];
index d7cc4cf..bb69e6b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.14 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.15 2008/01/01 01:00:03 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -1341,11 +1341,11 @@ hammer_rel_node(hammer_node_t node)
                 * Destroy the node if it is being deleted.  Free the node
                 * in the bitmap after we have unhooked it.
                 */
+               hammer_ref_cluster(cluster);
                if (node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) {
                        hammer_flush_node(node);
                        RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
                                  node);
-                       hammer_ref_cluster(cluster);
                        if ((buffer = node->buffer) != NULL) {
                                node->buffer = NULL;
                                hammer_remove_node_clist(buffer, node);
@@ -1406,11 +1406,18 @@ hammer_rel_node(hammer_node_t node)
                                hammer_remove_node_clist(buffer, node);
                        }
                        --hammer_count_nodes;
+                       node->lock.refs = -1;   /* sanity */
                        kfree(node, M_HAMMER);
-                       return;
+               } else {
+                       hammer_unref(&node->lock);
                }
 
-               hammer_unref(&node->lock);
+               /*
+                * We have to do this last, after the node has been removed
+                * from the cluster's RB tree or we risk a deadlock due to
+                * hammer_rel_buffer->hammer_rel_cluster->(node deadlock)
+                */
+               hammer_rel_cluster(cluster, 0);
        } else {
                hammer_unref(&node->lock);
        }
@@ -1440,15 +1447,10 @@ hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
         */
 again:
        if (node->cache1 != cache) {
-               if (node->cache2 == cache) {
-                       struct hammer_node **tmp;
-                       tmp = node->cache1;
-                       node->cache1 = node->cache2;
-                       node->cache2 = tmp;
-               } else {
+               if (node->cache2 != cache) {
                        if ((old = *cache) != NULL) {
-                               *cache = NULL;
-                               hammer_flush_node(old); /* can block */
+                               KKASSERT(node->lock.refs != 0);
+                               hammer_uncache_node(cache);
                                goto again;
                        }
                        if (node->cache2)
@@ -1456,6 +1458,11 @@ again:
                        node->cache2 = node->cache1;
                        node->cache1 = cache;
                        *cache = node;
+               } else {
+                       struct hammer_node **tmp;
+                       tmp = node->cache1;
+                       node->cache1 = node->cache2;
+                       node->cache2 = tmp;
                }
        }
 }
@@ -1996,8 +2003,10 @@ alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
         * the allocation.
         */
        if (buffer) {
+#if 0
                kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
                        buf_no, type, nelements);
+#endif
                hammer_modify_buffer(buffer);  /*XXX*/
                hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
                                  nelements);
@@ -2021,6 +2030,7 @@ hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
        info.error = 0;
        info.waitfor = waitfor;
 
+       kprintf("hammer_sync\n");
        vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
                      hammer_sync_scan1, hammer_sync_scan2, &info);
 
index 587f8c8..ad7614c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.9 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.10 2008/01/01 01:00:03 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -297,3 +297,12 @@ hammer_now_tid(void)
        return(tid);
 }
 
+hammer_tid_t
+hammer_str_to_tid(const char *str)
+{
+       hammer_tid_t tid;
+
+       tid = strtoq(str, NULL, 16) * 1000000000LL;
+       return(tid);
+}
+
index 5c09372..c76fe74 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.5 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.6 2008/01/01 01:00:03 dillon Exp $
  */
 
 #include "hammer.h"
@@ -48,6 +48,18 @@ hammer_start_transaction(struct hammer_transaction *trans,
        trans->tid = hammer_alloc_tid(trans);
 }
 
+void
+hammer_start_transaction_tid(struct hammer_transaction *trans,
+                            struct hammer_mount *hmp, hammer_tid_t tid)
+{
+       int error;
+
+       trans->hmp = hmp;
+       trans->rootvol = hammer_get_root_volume(hmp, &error);
+       KKASSERT(error == 0);
+       trans->tid = tid;
+}
+
 void
 hammer_abort_transaction(struct hammer_transaction *trans)
 {
@@ -60,6 +72,11 @@ hammer_commit_transaction(struct hammer_transaction *trans)
        hammer_rel_volume(trans->rootvol, 0);
 }
 
+/*
+ * Note: Successive transaction ids must be at least 2 apart so the
+ * B-Tree code can make a separator that does not match either the
+ * left or right hand sides.
+ */
 hammer_tid_t
 hammer_alloc_tid(hammer_transaction_t trans)
 {
@@ -75,7 +92,11 @@ hammer_alloc_tid(hammer_transaction_t trans)
                tid = ondisk->vol0_nexttid;
        if (tid == 0xFFFFFFFFFFFFFFFFULL)
                panic("hammer_start_transaction: Ran out of TIDs!");
-       ondisk->vol0_nexttid = tid + 1;
+       if (hammer_debug_tid) {
+               kprintf("alloc_tid %016llx (0x%08x)\n",
+                       tid, (int)(tid / 1000000000LL));
+       }
+       ondisk->vol0_nexttid = tid + 2;
        hammer_modify_volume_done(trans->rootvol);
 
        return(tid);
index dadc3a2..cfa43ca 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.12 2007/12/31 05:33:12 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.13 2008/01/01 01:00:03 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -48,6 +48,7 @@
 #include "hammer.h"
 
 int hammer_debug_btree;
+int hammer_debug_tid;
 int hammer_count_inodes;
 int hammer_count_records;
 int hammer_count_record_datas;
@@ -61,6 +62,8 @@ int hammer_count_spikes;
 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
           &hammer_debug_btree, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_tid, CTLFLAG_RW,
+          &hammer_debug_tid, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
           &hammer_count_inodes, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
@@ -311,7 +314,7 @@ hammer_free_hmp(struct mount *mp)
         * Unload & flush inodes
         */
        RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
-               hammer_unload_inode, NULL);
+               hammer_unload_inode, (void *)MNT_WAIT);
 
        /*
         * Unload & flush volumes
index 5d23817..d5e8d33 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.14 2007/12/31 05:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.15 2008/01/01 01:00:03 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -226,8 +226,10 @@ hammer_vop_read(struct vop_read_args *ap)
                        bqrelse(bp);
                        break;
                }
-               ip->ino_rec.ino_atime = trans.tid;
-               hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
+               if ((ip->flags & HAMMER_INODE_RO) == 0) {
+                       ip->ino_rec.ino_atime = trans.tid;
+                       hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
+               }
                bqrelse(bp);
        }
        hammer_commit_transaction(&trans);
@@ -255,6 +257,9 @@ hammer_vop_write(struct vop_write_args *ap)
        ip = VTOI(ap->a_vp);
        error = 0;
 
+       if (ip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -336,13 +341,13 @@ hammer_vop_write(struct vop_write_args *ap)
                /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
                if (ip->ino_rec.ino_size < uio->uio_offset) {
                        ip->ino_rec.ino_size = uio->uio_offset;
-                       ip->ino_rec.ino_mtime = trans.tid;
-                       flags = HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES |
-                               HAMMER_INODE_TID;
+                       flags = HAMMER_INODE_RDIRTY;
                        vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
                } else {
-                       flags = HAMMER_INODE_TID;
+                       flags = 0;
                }
+               ip->ino_rec.ino_mtime = trans.tid;
+               flags |= HAMMER_INODE_ITIMES;
                hammer_modify_inode(&trans, ip, flags);
                if (ap->a_ioflag & IO_SYNC) {
                        bwrite(bp);
@@ -420,6 +425,9 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap)
        nch = ap->a_nch;
        dip = VTOI(ap->a_dvp);
 
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -545,30 +553,50 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        int error;
        int i;
        int nlen;
+       int flags;
 
        /*
         * Misc initialization, plus handle as-of name extensions.  Look for
         * the '@@' extension.  Note that as-of files and directories cannot
         * be modified.
-        *
-        *
         */
        dip = VTOI(ap->a_dvp);
        ncp = ap->a_nch->ncp;
        asof = dip->obj_asof;
        nlen = ncp->nc_nlen;
+       flags = dip->flags;
 
        for (i = 0; i < nlen; ++i) {
                if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
-                       asof = hammer_now_tid() - 
-                              strtoq(ncp->nc_name + i + 2, NULL, 0) *
-                              1000000000LL;
+                       asof = hammer_str_to_tid(ncp->nc_name + i + 2);
                        kprintf("ASOF %016llx\n", asof);
+                       flags |= HAMMER_INODE_RO;
                        break;
                }
        }
        nlen = i;
 
+       /*
+        * If there is no path component the time extension is relative to
+        * dip.
+        */
+       if (nlen == 0) {
+               ip = hammer_get_inode(dip->hmp, dip->obj_id, asof,
+                                     flags, &error);
+               if (error == 0) {
+                       error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
+                       hammer_rel_inode(ip, 0);
+               } else {
+                       vp = NULL;
+               }
+               if (error == 0) {
+                       vn_unlock(vp);
+                       cache_setvp(ap->a_nch, vp);
+                       vrele(vp);
+               }
+               return(error);
+       }
+
        /*
         * Calculate the namekey and setup the key range for the scan.  This
         * works kinda like a chained hash table where the lower 32 bits
@@ -611,8 +639,8 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                error = hammer_ip_next(&cursor);
        }
        if (error == 0) {
-               ip = hammer_get_inode(dip->hmp, rec->entry.obj_id,
-                                     asof, &error);
+               ip = hammer_get_inode(dip->hmp, rec->entry.obj_id, asof,
+                                     flags, &error);
                if (error == 0) {
                        error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
                        hammer_rel_inode(ip, 0);
@@ -646,14 +674,25 @@ int
 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
 {
        struct hammer_inode *dip;
+       struct hammer_inode *ip;
        u_int64_t parent_obj_id;
+       int error;
 
        dip = VTOI(ap->a_dvp);
        if ((parent_obj_id = dip->ino_data.parent_obj_id) == 0) {
                *ap->a_vpp = NULL;
                return ENOENT;
        }
-       return(hammer_vfs_vget(dip->hmp->mp, parent_obj_id, ap->a_vpp));
+
+       ip = hammer_get_inode(dip->hmp, parent_obj_id, dip->obj_asof,
+                             dip->flags, &error);
+       if (ip == NULL) {
+               *ap->a_vpp = NULL;
+               return(error);
+       }
+       error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp);
+       hammer_rel_inode(ip, 0);
+       return (error);
 }
 
 /*
@@ -673,6 +712,11 @@ hammer_vop_nlink(struct vop_nlink_args *ap)
        dip = VTOI(ap->a_dvp);
        ip = VTOI(ap->a_vp);
 
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+       if (ip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -717,6 +761,9 @@ hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
        nch = ap->a_nch;
        dip = VTOI(ap->a_dvp);
 
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -781,6 +828,9 @@ hammer_vop_nmknod(struct vop_nmknod_args *ap)
        nch = ap->a_nch;
        dip = VTOI(ap->a_dvp);
 
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -829,6 +879,9 @@ static
 int
 hammer_vop_open(struct vop_open_args *ap)
 {
+       if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
+               return (EROFS);
+
        return(vop_stdopen(ap));
 }
 
@@ -1062,6 +1115,14 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
        tncp = ap->a_tnch->ncp;
        ip = VTOI(fncp->nc_vp);
        KKASSERT(ip != NULL);
+
+       if (fdip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+       if (tdip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+       if (ip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        hammer_start_transaction(&trans, fdip->hmp);
 
        /*
@@ -1171,6 +1232,8 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
 
        if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
                return(EROFS);
+       if (ip->flags & HAMMER_INODE_RO)
+               return (EROFS);
 
        hammer_start_transaction(&trans, ip->hmp);
        error = 0;
@@ -1268,8 +1331,6 @@ done:
        if (error) {
                hammer_abort_transaction(&trans);
        } else {
-               if (modflags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY))
-                       modflags |= HAMMER_INODE_TID;
                hammer_modify_inode(&trans, ip, modflags);
                hammer_commit_transaction(&trans);
        }
@@ -1296,6 +1357,9 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
        nch = ap->a_nch;
        dip = VTOI(ap->a_dvp);
 
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        /*
         * Create a transaction to cover the operations we perform.
         */
@@ -1549,6 +1613,10 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
        bio = ap->a_bio;
        bp = bio->bio_buf;
        ip = ap->a_vp->v_data;
+
+       if (ip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        hammer_start_transaction(&trans, ip->hmp);
 
 retry:
@@ -1627,6 +1695,10 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
         */
        dip = VTOI(dvp);
        ncp = nch->ncp;
+
+       if (dip->flags & HAMMER_INODE_RO)
+               return (EROFS);
+
        namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
 
        hammer_init_cursor_ip(&cursor, dip);
@@ -1672,7 +1744,7 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
         */
        if (error == 0) {
                ip = hammer_get_inode(dip->hmp, rec->entry.obj_id,
-                                     dip->hmp->asof, &error);
+                                     dip->hmp->asof, 0, &error);
                if (error == 0 && ip->ino_rec.base.base.obj_type ==
                                  HAMMER_OBJTYPE_DIRECTORY) {
                        error = hammer_ip_check_directory_empty(&trans, ip);